Descriptive Statistics of NYC Shooting Data

library(ggplot2)
library(readxl)
library(readr)
library(tidyverse)
library(dplyr)
library(sf)
library(plotly)
library(geojsonio)
library(knitr)
library(tidyr)
# Load the dataset
df_descriptive=read_csv("data_final.csv")
data_final <- read_csv("data_final.csv")

EDA sorted by neighborhood

Table showing Top 10 NTAs with the Highest Total Shooting Incidents Number (2017-2023)

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents
top_10_nta_overall <- df_descriptive %>%
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023)")
Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023)
BORO NTA total_incidents total_population percent_poverty percent_education
BROOKLYN Brownsville 350 60470 37.2 76.9
BROOKLYN Bedford-Stuyvesant (East) 323 84653 22.2 87.8
MANHATTAN Harlem (North) 253 83327 25.1 82.9
BROOKLYN Crown Heights (North) 235 85275 20.5 84.1
MANHATTAN East Harlem (North) 229 64655 32.3 75.9
BRONX Mott Haven-Port Morris 207 57718 43.7 62.0
BROOKLYN Bedford-Stuyvesant (West) 207 89189 26.0 84.8
BROOKLYN East New York-New Lots 205 53004 25.3 82.6
BRONX Concourse-Concourse Village 169 69387 31.9 70.5
BRONX Mount Eden-Claremont (West) 166 49651 31.8 67.9

Table showing Top 10 NTAs with the Highest Annual Incident Rate

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate <- top_10_incident_rate %>%
  select(NTA, BORO,Year,incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate, digits = 5, caption = "Top 10 NTAs with the Highest Incident Rate in Each Borough")
Top 10 NTAs with the Highest Incident Rate in Each Borough
NTA BORO Year incident_rate total_population percent_poverty percent_education
Claremont Village-Claremont (East) BRONX 2021 0.18735 24553 43.2 61.7
Brownsville BROOKLYN 2020 0.15380 60470 37.2 76.9
Tremont BRONX 2020 0.11820 32150 36.3 71.4
Inwood MANHATTAN 2021 0.11571 36299 15.3 77.0
East New York-New Lots BROOKLYN 2020 0.10754 53004 25.3 82.6
Hunts Point BRONX 2021 0.10574 15131 34.3 72.2
Mount Eden-Claremont (West) BRONX 2021 0.10070 49651 31.8 67.9
Brownsville BROOKLYN 2022 0.09922 60470 37.2 76.9
Brownsville BROOKLYN 2021 0.09757 60470 37.2 76.9
Tremont BRONX 2023 0.09642 32150 36.3 71.4

Table showing Top 10 total incident NTA (2017-2023) in Each Borough

# Table showing Top 10 total incident NTA in Each Borough
incident_by_nta_borough <- df_descriptive %>%
  drop_na() %>%
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n()) %>%
  arrange(BORO, desc(total_incidents)) %>%
  group_by(BORO) %>%
  slice_max(n = 10, order_by = total_incidents) %>%
  select(-total_incidents) %>%
  group_by(BORO) %>%
  mutate(row_num = row_number()) %>%
  pivot_wider(names_from = BORO, values_from = NTA) %>%
  unnest(cols = c(BRONX, BROOKLYN, MANHATTAN, QUEENS, `STATEN ISLAND`)) %>%
  select(row_num, everything())%>%
  slice(1:10)

# Display the table in the desired format
kable(incident_by_nta_borough, caption = "Top 10 NTAs with Total Shooting Incidents in Each Borough (2017-2023)")
Top 10 NTAs with Total Shooting Incidents in Each Borough (2017-2023)
row_num BRONX BROOKLYN MANHATTAN QUEENS STATEN ISLAND
1 Mott Haven-Port Morris Brownsville East Harlem (North) Far Rockaway-Bayswater St. George-New Brighton
2 Mount Hope Bedford-Stuyvesant (East) Harlem (North) Baisley Park Tompkinsville-Stapleton-Clifton-Fox Hills
3 Tremont Crown Heights (North) East Harlem (South) Richmond Hill West New Brighton-Silver Lake-Grymes Hill
4 Mount Eden-Claremont (West) East New York-New Lots Harlem (South) Jamaica Mariner’s Harbor-Arlington-Graniteville
5 Concourse-Concourse Village East Flatbush-Remsen Village Washington Heights (South) Rockaway Beach-Arverne-Edgemere Annadale-Huguenot-Prince’s Bay-Woodrow
6 Williamsbridge-Olinville East New York (North) Chelsea-Hudson Yards St. Albans Port Richmond
7 Melrose Bedford-Stuyvesant (West) Hamilton Heights-Sugar Hill South Ozone Park Rosebank-Shore Acres-Park Hill
8 Belmont Canarsie Inwood Elmhurst Grasmere-Arrochar-South Beach-Dongan Hills
9 Longwood Coney Island-Sea Gate Chinatown-Two Bridges South Jamaica Great Kills-Eltingville
10 Fordham Heights East New York-City Line Washington Heights (North) Astoria (East)-Woodside (North) New Dorp-Midland Beach

EDA sorted by NYC BOROs

Process of data manipulation

Plot of Total Number of Incidents Across NYC BOROs (2017-2023)

data_final <- read_csv("data_final.csv")
boro_map_data <- boro_map_data %>%
  mutate(
    hover_text = paste("Borough:", boro_name, "<br>Total Incidents:", Number_of_Incidents)
  )

# Create the interactive plot with click functionality
plot <- plot_ly(
  data = boro_map_data,
  type = "scattermapbox",
  split = ~boro_name,  # Separate polygons by boroughs
  color = ~Number_of_Incidents,  # Color based on the number of incidents
  colors = "viridis",  # Use a color scale
  text = ~hover_text,  # Display hover text
  hoverinfo = "text",
  marker = list(size = 8, opacity = 0.7)
) %>%
  layout(
    title = "Total Number of Incidents Across NYC BOROs (2017-2023)",
    mapbox = list(
      style = "carto-positron",  # Base map style
      center = list(lon = -74.00, lat = 40.7128),  # Center map on NYC
      zoom = 9
    )
  )

# Add click functionality to display the borough name and number of incidents
plot <- plot %>%
  event_register("plotly_click") %>%
  htmlwidgets::onRender("
    function(el, x) {
      el.on('plotly_click', function(d) {
        var point = d.points[0];
        var text = point.text;
        alert('You clicked on: ' + text);
      });
    }
  ")

# Display the interactive plot
plot
<<<<<<< HEAD
=======
>>>>>>> 835f58f9d5c77132b783aef0b75d92d53ad7ab95

The map shows the total number of incidents across NYC boroughs (2017–2023), with each borough represented by a distinct color. A gradient is used to indicate the magnitude of incidents, with darker shades corresponding to higher counts, ranging from 0 to over 3,000 incidents. Each borough is outlined and filled with its respective color, making it easy to distinguish. The legend on the right identifies the boroughs (Bronx, Brooklyn, Manhattan, Queens, Staten Island) and aligns with the color gradient to show the number of incidents. This visualization highlights geographical disparities in incident frequency across the boroughs, aiding in understanding spatial distribution patterns.

Top 10 NTAs with the Highest Total Shooting Incidents in each boro & Top 10 NTAs with the Highest Annual Incident Rate in each boro

MANHATTAN

Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in MANHATTAN

top_10_nta_overall_manhattan <- df_descriptive %>%
  filter(BORO == 'MANHATTAN') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_manhattan, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in MANHATTAN (2017-2023)")
Top 10 NTAs with the Highest Total Shooting Incidents in MANHATTAN (2017-2023)
NTA total_incidents total_population percent_poverty percent_education
Harlem (North) 253 83327 25.1 82.9
East Harlem (North) 229 64655 32.3 75.9
East Harlem (South) 145 59814 29.0 78.3
Harlem (South) 130 47113 18.8 87.4
Inwood 105 36299 15.3 77.0
Washington Heights (South) 88 72037 19.9 70.3
Washington Heights (North) 67 71842 14.8 79.3
Hamilton Heights-Sugar Hill 54 49410 24.0 78.6
Manhattanville-West Harlem 46 22183 26.7 77.4
East Village 41 71436 24.3 87.7

Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in MANHATTAN

top_10_incident_rate_manhattan <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'MANHATTAN') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_manhattan <- top_10_incident_rate_manhattan %>%
  select(NTA, Year, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_manhattan, digits = 6, caption = "Top 10 NTAs with the Highest Annual Incident Rate in MANHATTAN")
Top 10 NTAs with the Highest Annual Incident Rate in MANHATTAN
NTA Year incident_rate total_population percent_poverty percent_education
Inwood 2021 0.115706 36299 15.3 77.0
East Harlem (North) 2022 0.086614 64655 32.3 75.9
Harlem (North) 2021 0.080406 83327 25.1 82.9
Inwood 2020 0.068872 36299 15.3 77.0
Harlem (South) 2021 0.065799 47113 18.8 87.4
Harlem (North) 2022 0.064805 83327 25.1 82.9
East Harlem (North) 2021 0.061867 64655 32.3 75.9
Harlem (South) 2022 0.057309 47113 18.8 87.4
Harlem (North) 2020 0.055204 83327 25.1 82.9
East Harlem (South) 2021 0.055171 59814 29.0 78.3

BROOKLYN

Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in BROOKLYN

top_10_nta_overall_brooklyn <- df_descriptive %>%
  filter(BORO == 'BROOKLYN') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_brooklyn, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN (2017-2023)")
Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN (2017-2023)
NTA total_incidents total_population percent_poverty percent_education
Brownsville 350 60470 37.2 76.9
Bedford-Stuyvesant (East) 323 84653 22.2 87.8
Crown Heights (North) 235 85275 20.5 84.1
Bedford-Stuyvesant (West) 207 89189 26.0 84.8
East New York-New Lots 205 53004 25.3 82.6
Canarsie 166 89932 13.0 88.1
East New York (North) 148 42818 25.5 79.4
East Flatbush-Remsen Village 126 39115 18.7 83.7
Ocean Hill 120 37952 27.3 83.1
Flatbush 111 66503 13.7 87.8

Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in BROOKLYN

top_10_incident_rate_brooklyn <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'BROOKLYN') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_brooklyn <- top_10_incident_rate_brooklyn %>%
  select(NTA, Year, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_brooklyn, digits = 6, caption = "Top 10 NTAs with the Annual Highest Incident Rate in BROOKLYN")
Top 10 NTAs with the Annual Highest Incident Rate in BROOKLYN
NTA Year incident_rate total_population percent_poverty percent_education
Brownsville 2020 0.153795 60470 37.2 76.9
East New York-New Lots 2020 0.107539 53004 25.3 82.6
Brownsville 2022 0.099223 60470 37.2 76.9
Brownsville 2021 0.097569 60470 37.2 76.9
Bedford-Stuyvesant (East) 2020 0.088597 84653 22.2 87.8
East New York (North) 2020 0.079406 42818 25.5 79.4
East New York-New Lots 2021 0.079239 53004 25.3 82.6
East Flatbush-Remsen Village 2020 0.071584 39115 18.7 83.7
Ocean Hill 2021 0.071142 37952 27.3 83.1
Brownsville 2018 0.069456 60470 37.2 76.9

BRONX

Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in BRONX

top_10_nta_overall_bronx <- df_descriptive %>%
  filter(BORO == 'BRONX') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_bronx, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in BRONX (2017-2023)")
Top 10 NTAs with the Highest Total Shooting Incidents in BRONX (2017-2023)
NTA total_incidents total_population percent_poverty percent_education
Mott Haven-Port Morris 207 57718 43.7 62.0
Concourse-Concourse Village 169 69387 31.9 70.5
Mount Eden-Claremont (West) 166 49651 31.8 67.9
Williamsbridge-Olinville 158 61346 21.0 76.6
Melrose 152 42651 40.7 68.2
Mount Hope 149 49099 30.2 66.8
Tremont 138 32150 36.3 71.4
Longwood 130 40289 32.0 66.9
Belmont 116 35825 39.6 65.6
Fordham Heights 112 32099 33.2 66.0

Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in BRONX

top_10_incident_rate_bronx <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'BRONX') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_bronx <- top_10_incident_rate_bronx %>%
  select(NTA, Year, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_bronx, digits = 6, caption = "Top 10 NTAs with the Highest Annual Incident Rate in BRONX")
Top 10 NTAs with the Highest Annual Incident Rate in BRONX
NTA Year incident_rate total_population percent_poverty percent_education
Claremont Village-Claremont (East) 2021 0.187350 24553 43.2 61.7
Tremont 2020 0.118196 32150 36.3 71.4
Hunts Point 2021 0.105743 15131 34.3 72.2
Mount Eden-Claremont (West) 2021 0.100703 49651 31.8 67.9
Tremont 2023 0.096423 32150 36.3 71.4
West Farms 2021 0.094307 20147 44.8 61.1
Longwood 2021 0.091836 40289 32.0 66.9
Mount Hope 2023 0.089615 49099 30.2 66.8
West Farms 2022 0.084380 20147 44.8 61.1
Morrisania 2020 0.082431 37607 35.7 65.9

QUEENS

Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in QUEENS

top_10_nta_overall_queens <- df_descriptive %>%
  filter(BORO == 'QUEENS') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_queens, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in QUEENS (2017-2023)")
Top 10 NTAs with the Highest Total Shooting Incidents in QUEENS (2017-2023)
NTA total_incidents total_population percent_poverty percent_education
Far Rockaway-Bayswater 116 58648 18.3 79.9
Rockaway Beach-Arverne-Edgemere 100 41367 22.1 82.1
St. Albans 93 51816 8.6 88.8
South Jamaica 91 44401 13.4 78.9
Baisley Park 83 43090 11.4 83.5
Jamaica 82 60993 15.7 70.4
Queensbridge-Ravenswood-Dutch Kills 79 32954 16.5 85.0
South Ozone Park 66 79540 9.7 76.7
Richmond Hill 55 34100 10.6 79.8
Laurelton 45 26088 5.4 91.2

Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in QUEENS

top_10_incident_rate_queens <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'QUEENS') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_queens <- top_10_incident_rate_queens %>%
  select(NTA, Year, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_queens, digits = 6, caption = "Top 10 NTAs with the Highest Annual Incident Rate in QUEENS")
Top 10 NTAs with the Highest Annual Incident Rate in QUEENS
NTA Year incident_rate total_population percent_poverty percent_education
South Jamaica 2020 0.069818 44401 13.4 78.9
Rockaway Beach-Arverne-Edgemere 2020 0.067687 41367 22.1 82.1
Far Rockaway-Bayswater 2020 0.064793 58648 18.3 79.9
Queensbridge-Ravenswood-Dutch Kills 2020 0.054622 32954 16.5 85.0
Laurelton 2020 0.053665 26088 5.4 91.2
Rockaway Beach-Arverne-Edgemere 2021 0.050765 41367 22.1 82.1
Richmond Hill 2023 0.046921 34100 10.6 79.8
Queensbridge-Ravenswood-Dutch Kills 2019 0.045518 32954 16.5 85.0
St. Albans 2021 0.044388 51816 8.6 88.8
Richmond Hill 2022 0.043988 34100 10.6 79.8

STATEN ISLAND

Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in STATEN ISLAND

top_10_nta_overall_staten <- df_descriptive %>%
  filter(BORO == 'STATEN ISLAND') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_staten, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in STATEN ISLAND (2017-2023)")
Top 10 NTAs with the Highest Total Shooting Incidents in STATEN ISLAND (2017-2023)
NTA total_incidents total_population percent_poverty percent_education
St. George-New Brighton 48 20549 18.5 79.9
Mariner’s Harbor-Arlington-Graniteville 46 33492 16.8 83.3
Tompkinsville-Stapleton-Clifton-Fox Hills 42 19027 25.5 79.2
Rosebank-Shore Acres-Park Hill 34 25510 14.4 81.7
West New Brighton-Silver Lake-Grymes Hill 26 37010 9.9 91.2
Port Richmond 23 22609 22.0 83.2
Grasmere-Arrochar-South Beach-Dongan Hills 8 36259 11.3 85.4
Annadale-Huguenot-Prince’s Bay-Woodrow 6 40534 6.0 92.6
New Dorp-Midland Beach 5 29083 8.5 84.6
Arden Heights-Rossville 4 30683 4.3 92.3

Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in STATEN ISLAND

top_10_incident_rate_staten <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'STATEN ISLAND') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_staten <- top_10_incident_rate_staten %>%
  select(NTA, Year, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_staten, digits = 6, caption = "Top 10 NTAs with the Highest Annual Incident Rate in STATEN ISLAND")
Top 10 NTAs with the Highest Annual Incident Rate in STATEN ISLAND
NTA Year incident_rate total_population percent_poverty percent_education
St. George-New Brighton 2023 0.058397 20549 18.5 79.9
Tompkinsville-Stapleton-Clifton-Fox Hills 2021 0.052557 19027 25.5 79.2
Mariner’s Harbor-Arlington-Graniteville 2020 0.047773 33492 16.8 83.3
Port Richmond 2020 0.044230 22609 22.0 83.2
St. George-New Brighton 2021 0.043798 20549 18.5 79.9
St. George-New Brighton 2017 0.038931 20549 18.5 79.9
Mariner’s Harbor-Arlington-Graniteville 2017 0.038815 33492 16.8 83.3
Tompkinsville-Stapleton-Clifton-Fox Hills 2017 0.036790 19027 25.5 79.2
Rosebank-Shore Acres-Park Hill 2020 0.035280 25510 14.4 81.7
St. George-New Brighton 2022 0.034065 20549 18.5 79.9

EDA sorted by NYC CDTAs

Process of data manipulation

## There is space between letter and number in CDTA, delete the space below
data_final$CDTA <- gsub(" ", "", data_final$CDTA)

cdta_incident_counts <- data_final %>%
  group_by(CDTA) %>%
  summarise(Number_of_Incidents = n(), .groups = "drop")

# Remove any trailing spaces or mismatches in CDTA identifiers:
cdta_shape$CDTA2020 <- gsub(" ", "", cdta_shape$CDTA2020)
data_final$CDTA <- gsub(" ", "", data_final$CDTA)

# Identify Missing Matches
unmatched_cdta <- setdiff(cdta_shape$CDTA2020, data_final$CDTA)

#Re-Merge the Data
cdta_map_data <- cdta_shape %>%
  left_join(cdta_incident_counts, by = c("CDTA2020" = "CDTA"))

# Update NA Handling
cdta_map_data <- cdta_map_data %>%
  mutate(
    Number_of_Incidents = ifelse(is.na(Number_of_Incidents), 0, Number_of_Incidents),
    Incident_Range = cut(
      Number_of_Incidents,
      breaks = seq(0, 600, by = 120), 
      labels = c("0-120", "121-240", "241-360", "361-480", "481-600"),
      include.lowest = TRUE
    )
  )

“QN80” “SI95” “QN84” are NAs. There is no shooting incident in these CDTAs. Assigned these 3 CDTAs with 0 incident.

Plot of Total Number of Incidents Across NYC CDTAs from 2017 to 2023

ggplot(data = cdta_map_data) +
  geom_sf(aes(fill = Incident_Range), color = "white", size = 0.2) +
 geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") +  # Add labels+
  scale_fill_manual(
    values = c(
      "0-120" = "#b2e2e2",
      "121-240" = "skyblue",
      "241-360" = "#66c2a4",
      "361-480" = "#2ca25f",
      "481-600" = "#006d2c"
    ),
    name = "Number of Incidents"
  ) +
  labs(
    title = "Total Number of Incidents Across NYC CDTAs from 2017 to 2023",
    subtitle = "Incidents grouped by range (0-600, 120 breaks)"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank()
  )

The map shows NYC CDTA incidents (2017–2023) using a gradient from light blue (fewer incidents) to dark green (more incidents), with counts labeled on each district. Dark green areas highlight hotspots, likely in densely populated regions, while lighter blue areas, like Staten Island, show fewer incidents. This visualization helps identify trends and prioritize safety efforts.

<<<<<<< HEAD

Plots of CDTA Incidents in each Borough from 2017 to 2023

=======

Top 10 NTAs with the Highest Total Shooting Incidents in each boro

MANHATTAN

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in MANHATTAN
top_10_nta_overall_manhattan <- df_descriptive %>%
  filter(BORO == 'MANHATTAN') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_manhattan, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in MANHATTAN")
Top 10 NTAs with the Highest Total Shooting Incidents in MANHATTAN
BORO NTA total_incidents total_population percent_poverty percent_education
MANHATTAN Harlem (North) 253 83327 25.1 82.9
MANHATTAN East Harlem (North) 229 64655 32.3 75.9
MANHATTAN East Harlem (South) 145 59814 29.0 78.3
MANHATTAN Harlem (South) 130 47113 18.8 87.4
MANHATTAN Inwood 105 36299 15.3 77.0
MANHATTAN Washington Heights (South) 88 72037 19.9 70.3
MANHATTAN Washington Heights (North) 67 71842 14.8 79.3
MANHATTAN Hamilton Heights-Sugar Hill 54 49410 24.0 78.6
MANHATTAN Manhattanville-West Harlem 46 22183 26.7 77.4
MANHATTAN East Village 41 71436 24.3 87.7

BROOKLYN

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
top_10_nta_overall_brooklyn <- df_descriptive %>%
  filter(BORO == 'BROOKLYN') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_brooklyn, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN")
Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
BORO NTA total_incidents total_population percent_poverty percent_education
BROOKLYN Brownsville 350 60470 37.2 76.9
BROOKLYN Bedford-Stuyvesant (East) 323 84653 22.2 87.8
BROOKLYN Crown Heights (North) 235 85275 20.5 84.1
BROOKLYN Bedford-Stuyvesant (West) 207 89189 26.0 84.8
BROOKLYN East New York-New Lots 205 53004 25.3 82.6
BROOKLYN Canarsie 166 89932 13.0 88.1
BROOKLYN East New York (North) 148 42818 25.5 79.4
BROOKLYN East Flatbush-Remsen Village 126 39115 18.7 83.7
BROOKLYN Ocean Hill 120 37952 27.3 83.1
BROOKLYN Flatbush 111 66503 13.7 87.8

BRONX

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
top_10_nta_overall_bronx <- df_descriptive %>%
  filter(BORO == 'BRONX') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_bronx, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in BRONX")
Top 10 NTAs with the Highest Total Shooting Incidents in BRONX
BORO NTA total_incidents total_population percent_poverty percent_education
BRONX Mott Haven-Port Morris 207 57718 43.7 62.0
BRONX Concourse-Concourse Village 169 69387 31.9 70.5
BRONX Mount Eden-Claremont (West) 166 49651 31.8 67.9
BRONX Williamsbridge-Olinville 158 61346 21.0 76.6
BRONX Melrose 152 42651 40.7 68.2
BRONX Mount Hope 149 49099 30.2 66.8
BRONX Tremont 138 32150 36.3 71.4
BRONX Longwood 130 40289 32.0 66.9
BRONX Belmont 116 35825 39.6 65.6
BRONX Fordham Heights 112 32099 33.2 66.0

QUEENS

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
top_10_nta_overall_queens <- df_descriptive %>%
  filter(BORO == 'QUEENS') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_queens, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in QUEENS")
Top 10 NTAs with the Highest Total Shooting Incidents in QUEENS
BORO NTA total_incidents total_population percent_poverty percent_education
QUEENS Far Rockaway-Bayswater 116 58648 18.3 79.9
QUEENS Rockaway Beach-Arverne-Edgemere 100 41367 22.1 82.1
QUEENS St. Albans 93 51816 8.6 88.8
QUEENS South Jamaica 91 44401 13.4 78.9
QUEENS Baisley Park 83 43090 11.4 83.5
QUEENS Jamaica 82 60993 15.7 70.4
QUEENS Queensbridge-Ravenswood-Dutch Kills 79 32954 16.5 85.0
QUEENS South Ozone Park 66 79540 9.7 76.7
QUEENS Richmond Hill 55 34100 10.6 79.8
QUEENS Laurelton 45 26088 5.4 91.2

STATEN ISLAND

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
top_10_nta_overall_staten <- df_descriptive %>%
  filter(BORO == 'STATEN ISLAND') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_staten, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in STATEN ISLAND")
Top 10 NTAs with the Highest Total Shooting Incidents in STATEN ISLAND
BORO NTA total_incidents total_population percent_poverty percent_education
STATEN ISLAND St. George-New Brighton 48 20549 18.5 79.9
STATEN ISLAND Mariner’s Harbor-Arlington-Graniteville 46 33492 16.8 83.3
STATEN ISLAND Tompkinsville-Stapleton-Clifton-Fox Hills 42 19027 25.5 79.2
STATEN ISLAND Rosebank-Shore Acres-Park Hill 34 25510 14.4 81.7
STATEN ISLAND West New Brighton-Silver Lake-Grymes Hill 26 37010 9.9 91.2
STATEN ISLAND Port Richmond 23 22609 22.0 83.2
STATEN ISLAND Grasmere-Arrochar-South Beach-Dongan Hills 8 36259 11.3 85.4
STATEN ISLAND Annadale-Huguenot-Prince’s Bay-Woodrow 6 40534 6.0 92.6
STATEN ISLAND New Dorp-Midland Beach 5 29083 8.5 84.6
STATEN ISLAND Arden Heights-Rossville 4 30683 4.3 92.3

Top 10 NTAs with the Highest Incident Rate in each boro

MANHATTAN

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_manhattan <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'MANHATTAN') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_manhattan <- top_10_incident_rate_manhattan %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_manhattan, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in MANHATTAN")
Top 10 NTAs with the Highest Incident Rate by Year in MANHATTAN
NTA BORO incident_rate total_population percent_poverty percent_education
Inwood MANHATTAN 0.115706 36299 15.3 77.0
East Harlem (North) MANHATTAN 0.086614 64655 32.3 75.9
Harlem (North) MANHATTAN 0.080406 83327 25.1 82.9
Inwood MANHATTAN 0.068872 36299 15.3 77.0
Harlem (South) MANHATTAN 0.065799 47113 18.8 87.4
Harlem (North) MANHATTAN 0.064805 83327 25.1 82.9
East Harlem (North) MANHATTAN 0.061867 64655 32.3 75.9
Harlem (South) MANHATTAN 0.057309 47113 18.8 87.4
Harlem (North) MANHATTAN 0.055204 83327 25.1 82.9
East Harlem (South) MANHATTAN 0.055171 59814 29.0 78.3

BROOKLYN

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_brooklyn <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'BROOKLYN') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_brooklyn <- top_10_incident_rate_brooklyn %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_brooklyn, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in BROOKLYN")
Top 10 NTAs with the Highest Incident Rate by Year in BROOKLYN
NTA BORO incident_rate total_population percent_poverty percent_education
Brownsville BROOKLYN 0.153795 60470 37.2 76.9
East New York-New Lots BROOKLYN 0.107539 53004 25.3 82.6
Brownsville BROOKLYN 0.099223 60470 37.2 76.9
Brownsville BROOKLYN 0.097569 60470 37.2 76.9
Bedford-Stuyvesant (East) BROOKLYN 0.088597 84653 22.2 87.8
East New York (North) BROOKLYN 0.079406 42818 25.5 79.4
East New York-New Lots BROOKLYN 0.079239 53004 25.3 82.6
East Flatbush-Remsen Village BROOKLYN 0.071584 39115 18.7 83.7
Ocean Hill BROOKLYN 0.071142 37952 27.3 83.1
Brownsville BROOKLYN 0.069456 60470 37.2 76.9

BRONX

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_bronx <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'BRONX') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_bronx <- top_10_incident_rate_bronx %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_bronx, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in BRONX")
Top 10 NTAs with the Highest Incident Rate by Year in BRONX
NTA BORO incident_rate total_population percent_poverty percent_education
Claremont Village-Claremont (East) BRONX 0.187350 24553 43.2 61.7
Tremont BRONX 0.118196 32150 36.3 71.4
Hunts Point BRONX 0.105743 15131 34.3 72.2
Mount Eden-Claremont (West) BRONX 0.100703 49651 31.8 67.9
Tremont BRONX 0.096423 32150 36.3 71.4
West Farms BRONX 0.094307 20147 44.8 61.1
Longwood BRONX 0.091836 40289 32.0 66.9
Mount Hope BRONX 0.089615 49099 30.2 66.8
West Farms BRONX 0.084380 20147 44.8 61.1
Morrisania BRONX 0.082431 37607 35.7 65.9

QUEENS

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_queens <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'QUEENS') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_queens <- top_10_incident_rate_queens %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_queens, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in QUEENS")
Top 10 NTAs with the Highest Incident Rate by Year in QUEENS
NTA BORO incident_rate total_population percent_poverty percent_education
South Jamaica QUEENS 0.069818 44401 13.4 78.9
Rockaway Beach-Arverne-Edgemere QUEENS 0.067687 41367 22.1 82.1
Far Rockaway-Bayswater QUEENS 0.064793 58648 18.3 79.9
Queensbridge-Ravenswood-Dutch Kills QUEENS 0.054622 32954 16.5 85.0
Laurelton QUEENS 0.053665 26088 5.4 91.2
Rockaway Beach-Arverne-Edgemere QUEENS 0.050765 41367 22.1 82.1
Richmond Hill QUEENS 0.046921 34100 10.6 79.8
Queensbridge-Ravenswood-Dutch Kills QUEENS 0.045518 32954 16.5 85.0
St. Albans QUEENS 0.044388 51816 8.6 88.8
Richmond Hill QUEENS 0.043988 34100 10.6 79.8

STATEN ISLAND

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_staten <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'STATEN ISLAND') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_staten <- top_10_incident_rate_staten %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_staten, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in STATEN ISLAND")
Top 10 NTAs with the Highest Incident Rate by Year in STATEN ISLAND
NTA BORO incident_rate total_population percent_poverty percent_education
St. George-New Brighton STATEN ISLAND 0.058397 20549 18.5 79.9
Tompkinsville-Stapleton-Clifton-Fox Hills STATEN ISLAND 0.052557 19027 25.5 79.2
Mariner’s Harbor-Arlington-Graniteville STATEN ISLAND 0.047773 33492 16.8 83.3
Port Richmond STATEN ISLAND 0.044230 22609 22.0 83.2
St. George-New Brighton STATEN ISLAND 0.043798 20549 18.5 79.9
St. George-New Brighton STATEN ISLAND 0.038931 20549 18.5 79.9
Mariner’s Harbor-Arlington-Graniteville STATEN ISLAND 0.038815 33492 16.8 83.3
Tompkinsville-Stapleton-Clifton-Fox Hills STATEN ISLAND 0.036790 19027 25.5 79.2
Rosebank-Shore Acres-Park Hill STATEN ISLAND 0.035280 25510 14.4 81.7
St. George-New Brighton STATEN ISLAND 0.034065 20549 18.5 79.9

Number of Incidents counted by CDTAs in each Boroughs from 2017 to 2023

>>>>>>> 835f58f9d5c77132b783aef0b75d92d53ad7ab95
boroughs <- unique(cdta_map_data$BoroName)
for (b in boroughs) {
    borough_data <- cdta_map_data %>%
        filter(BoroName == b)
    plot <- ggplot(data = borough_data) +
        geom_sf(aes(fill = Number_of_Incidents), color = "black") +
      geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") +  # Add labels
        scale_fill_gradientn(
      colors = c( "green", "yellow", "red"), # Custom color scale
      name = "Number of Incidents"
    ) +
        labs(
            title = paste("CDTA Incidents in", b),
            subtitle = "2017 to 2023",
            x = "Longitude",
            y = "Latitude"
        ) +
        theme_minimal()
    print(plot) 
}